
### This script reproduces Table H9 that shows a robustness check on 
### Hypothesis 3 with weighted data from the Navalny survey

library(survey)

##### Compute models 1 & 2 in Table H9: complete 2013 sample #####

#### compare distribution of variables in 2013 and Navalny survey

# female
prop.table(table(data1$female))  

# make age categories
data1$agecat <- with(data1,
                          ifelse(age<30,1,
                                 ifelse(age<45,2,
                                        ifelse(age<60,3,4))))
data2$agecat <- with(data2,
                   ifelse(age<30,1,
                          ifelse(age<45,2,
                                 ifelse(age<60,3,4))))
table(data1$agecat)
table(data2$agecat)

round(prop.table(table(data1$agecat)), 3)
round(prop.table(table(data2$agecat)), 3)

# wealth
prop.table(table(pooled$wealth[pooled$nav==0]))
prop.table(table(pooled$wealth[pooled$nav==1]))

#### create weights
# remove NAs from sample
data2.test <- data2[!is.na(data2$female) & !is.na(data2$age),]

# create survey object of Navalny data
data2.svy.unweighted <- svydesign(ids=~1, data=data2.test)

# setting up population data frame(s)
female.dist <- data.frame(female = c(0, 1),
                          Freq = nrow(data2.test) * c(prop.table(table(data1$female))))
agecat.dist <- data.frame(agecat = c(1, 2, 3, 4),
                          Freq = nrow(data2.test) * c(round(prop.table(table(data1$agecat)), 3)))

# rake just for female
data2.svy.rake <- rake(design = data2.svy.unweighted,
                     sample.margins = list(~female),
                     population.margins = list(female.dist))

# rake for female and agecat
data2.svy.rake <- rake(design = data2.svy.unweighted,
                     sample.margins = list(~female, ~agecat),
                     population.margins = list(female.dist, agecat.dist))


# check weights, trim
summary(weights(data2.svy.rake)) # trim if <0.3 or >3
table(weights(data2.svy.rake))

data2.svy.rake.trim <- trimWeights(data2.svy.rake, lower=0.3, upper=3, 
                                 strict=TRUE) 
table(weights(data2.svy.rake.trim))

# add weights to data
data2.weighted <- as.data.frame(data2.svy.rake$variables)
data2.weighted$weight <- weights(data2.svy.rake)
data2.weighted$weight <- weights(data2.svy.rake.trim) # use this for trimmed weights

#### test glm with weighted and unweighted data

## merge 2013 with weithed Navalny data
mergedata1 <- with(data1,
                   data.frame(age, edu, female, wealth, poldiscST, moscow,
                              extra, agree, open, neur, consc,
                              navact1 = 0,
                              polact1, polact2, 
                              putin_sup, putin_app))
mergedata1$weight <- 1 

mergedata2.weighted <- with (data2.weighted, data.frame(age, edu, female, wealth, poldiscST, moscow,
                                                    extra, agree, open, neur, consc,
                                                    navact1 = navact, 
                                                    polact1 = 1, polact2 = 1, 
                                                    putin_sup = 0, putin_app = 0, weight))

# reduce Navalny data data to those with higher education and at least medium wealth to match data1
mergedata2.weighted <- mergedata2.weighted[mergedata2.weighted$edu>=3 
                                       & mergedata2.weighted$wealth>=3,]

# merge data sets
pooled.w <- data.frame(rbind(mergedata1, mergedata2.weighted))


# re-standardize numerical variables in pooled data set
pooled.w$extraST <- ((pooled.w$extra-mean(pooled.w$extra, na.rm = T))/(sd(pooled.w$extra, na.rm = T)))
pooled.w$agreeST <- ((pooled.w$agree-mean(pooled.w$agree, na.rm = T))/(sd(pooled.w$agree, na.rm = T)))
pooled.w$openST <- ((pooled.w$open-mean(pooled.w$open, na.rm = T))/(sd(pooled.w$open, na.rm = T)))
pooled.w$conscST <- ((pooled.w$consc-mean(pooled.w$consc, na.rm = T))/(sd(pooled.w$consc, na.rm = T)))
pooled.w$neurST <- ((pooled.w$neur-mean(pooled.w$neur, na.rm = T))/(sd(pooled.w$neur, na.rm = T)))

pooled.w$poldiscST <- ((pooled.w$poldiscST-mean(pooled.w$poldiscST, na.rm = T))/(sd(pooled.w$poldiscST, na.rm = T)))


#### run test models
pooled.w$subset <- ifelse(pooled.w$navact1==1,1,
                                  ifelse(pooled.w$putin_sup==1 & pooled.w$polact1==0,0,NA))

# run models
summary(m.h3_2.1a)

mH3_robset9_1 <- glm(subset ~ openST + conscST + extraST + agreeST + neurST,
               family = "binomial", weights = weight, data = pooled.w)

mH3_robset9_2 <- glm(subset ~ openST + conscST + extraST + agreeST + neurST +
                 age + edu + female + wealth + moscow,
               family = "binomial", weights = weight, data = pooled.w)

tab_model(mH3_robset9_1, mH3_robset9_2,
          show.se = TRUE, show.ci = FALSE, 
          p.style = "stars", collapse.se = TRUE, show.ci50 = TRUE, show.aic = TRUE)


##### Compute models 3 & 4 in Table H9: 2013 sample reduced to those with VK account #####

### reduce data
data1_short <- data1[!is.na(data1$VK),]

#### compare distribution of variables in 2013 and Navalny survey

# female
prop.table(table(data1_short$female))  

# make age categories
data1_short$agecat <- with(data1_short,
                     ifelse(age<30,1,
                            ifelse(age<45,2,
                                   ifelse(age<60,3,4))))
data1_short$agecat <- with(data1_short,
                     ifelse(age<30,1,
                            ifelse(age<45,2,
                                   ifelse(age<60,3,4))))

#### create weights
# remove NAs from sample
data2.test <- data2[!is.na(data2$female) & !is.na(data2$age),]

# create survey object of Navalny data
data2.svy.unweighted <- svydesign(ids=~1, data=data2.test)

# setting up population data frame(s)
female.dist <- data.frame(female = c(0, 1),
                          Freq = nrow(data2.test) * c(prop.table(table(data1_short$female))))
agecat.dist <- data.frame(agecat = c(1, 2, 3, 4),
                          Freq = nrow(data2.test) * c(round(prop.table(table(data1_short$agecat)), 3)))

# rake just for female
data2.svy.rake <- rake(design = data2.svy.unweighted,
                       sample.margins = list(~female),
                       population.margins = list(female.dist))

# rake for female and agecat
data2.svy.rake <- rake(design = data2.svy.unweighted,
                       sample.margins = list(~female, ~agecat),
                       population.margins = list(female.dist, agecat.dist))

# check weights, trim
summary(weights(data2.svy.rake)) # trim if <0.3 or >3
table(weights(data2.svy.rake))

data2.svy.rake.trim <- trimWeights(data2.svy.rake, lower=0.3, upper=3, 
                                   strict=TRUE) 
table(weights(data2.svy.rake.trim))

# add weights to data
data2.weighted <- as.data.frame(data2.svy.rake$variables)
data2.weighted$weight <- weights(data2.svy.rake)
data2.weighted$weight <- weights(data2.svy.rake.trim) # use this for trimmed weights

#### merge 2013 with weithed Navalny data
mergedata1 <- with(data1_short,
                   data.frame(age, edu, female, wealth, poldiscST, moscow,
                              extra, agree, open, neur, consc,
                              navact1 = 0,
                              polact1, polact2, 
                              putin_sup, putin_app))
mergedata1$weight <- 1 

mergedata2.weighted <- with (data2.weighted, data.frame(age, edu, female, wealth, poldiscST, moscow,
                                                        extra, agree, open, neur, consc,
                                                        navact1 = navact, 
                                                        polact1 = 1, polact2 = 1, 
                                                        putin_sup = 0, putin_app = 0, weight))

# reduce Navalny data data to those with higher education and at least medium wealth to match data1
mergedata2.weighted <- mergedata2.weighted[mergedata2.weighted$edu>=3 
                                           & mergedata2.weighted$wealth>=3,]

# merge data sets
pooled.w <- data.frame(rbind(mergedata1, mergedata2.weighted))

# re-standardize numerical variables in pooled data set
pooled.w$extraST <- ((pooled.w$extra-mean(pooled.w$extra, na.rm = T))/(sd(pooled.w$extra, na.rm = T)))
pooled.w$agreeST <- ((pooled.w$agree-mean(pooled.w$agree, na.rm = T))/(sd(pooled.w$agree, na.rm = T)))
pooled.w$openST <- ((pooled.w$open-mean(pooled.w$open, na.rm = T))/(sd(pooled.w$open, na.rm = T)))
pooled.w$conscST <- ((pooled.w$consc-mean(pooled.w$consc, na.rm = T))/(sd(pooled.w$consc, na.rm = T)))
pooled.w$neurST <- ((pooled.w$neur-mean(pooled.w$neur, na.rm = T))/(sd(pooled.w$neur, na.rm = T)))

pooled.w$poldiscST <- ((pooled.w$poldiscST-mean(pooled.w$poldiscST, na.rm = T))/(sd(pooled.w$poldiscST, na.rm = T)))


#### run test models
pooled.w$subset <- ifelse(pooled.w$navact1==1,1,
                          ifelse(pooled.w$putin_sup==1 & pooled.w$polact1==0,0,NA))

# run models
mH3_robset9_3 <- glm(subset ~ openST + conscST + extraST + agreeST + neurST,
                     family = "binomial", weights = weight, data = pooled.w)

mH3_robset9_4 <- glm(subset ~ openST + conscST + extraST + agreeST + neurST +
                       age + edu + female + wealth + moscow,
                     family = "binomial", weights = weight, data = pooled.w)

tab_model(mH3_robset9_3, mH3_robset9_4,
          show.se = TRUE, show.ci = FALSE, 
          p.style = "stars", collapse.se = TRUE, show.ci50 = TRUE, show.aic = TRUE)



##### create Table H9 #####

tab_model(mH3_robset9_1, mH3_robset9_2, mH3_robset9_3, mH3_robset9_4,
          show.se = TRUE, show.ci = FALSE, 
          p.style = "stars", collapse.se = TRUE, show.ci50 = TRUE, show.aic = TRUE,
          file = "Table_H9.html")


